About

This markdown explores the weighted gene correlation networks of Pristina leidyi from a graph quantitative approach.

We have defined a number of functions that work with igraph objects and allow to annotate genes inside the networks with different properties. We can later on explore network metrics based on different properties of the genes (module membership, functional annotation, etc.)

We start by loading the necessary packages, importantly: wgcna, igraph, dplyr and ggplot2.

library(data.table)
library(reshape2)
library(plyr)
library(dplyr)
library(tidyverse)
library(ComplexHeatmap)
library(ggplot2)
library(ggrepel)
library(circlize)
library(RColorBrewer)
library(viridis)
library(colorspace)
library(WGCNA)
library(igraph)
library(topGO)

Loading custom code

We load here the necessary functions to work with graphs as stated above.

source("./code/R_functions/wgcna_igraph_functions/wgcna_igraph_functions.R")
source("./code/R_functions/wgcna_igraph_functions/topGO_wrap_function.R")

Loading previously-generated outputs from WGCNA

load("./outputs/rda/03_wgcna_analysis.rda")

Generating a set of gene attributes

We will ‘annotate’ the graph using several information sources. These files have a geneid – trait long format.

We will read them from a source file.

load("./data/06_graph_analysis_data/wgcna_graph_annotation.rda")

This includes: * Transcription factor class (if any; both class and color for visualisation)

head(plei_tfs_graph_annotation)
##                  id TFclass      color
## 1 PrileiEVm006188t1    A_20 rosybrown3
## 2 PrileiEVm016861t1    A_20 rosybrown3
## 3 PrileiEVm018456t1    A_20 rosybrown3
## 4 PrileiEVm018745t1    A_20 rosybrown3
## 5 PrileiEVm007500t1    AP_2      cyan3
## 6 PrileiEVm000374t1    ARID    magenta
head(plei_modules_graph_annotation)
##                  id                    module newcolor
## 1 PrileiEVm010084t1 module_anterior_intestine  #228B22
## 2 PrileiEVm008526t1 module_anterior_intestine  #228B22
## 3 PrileiEVm015441t1 module_anterior_intestine  #228B22
## 4 PrileiEVm010557t1 module_anterior_intestine  #228B22
## 5 PrileiEVm015449t1 module_anterior_intestine  #228B22
## 6 PrileiEVm001453t1 module_anterior_intestine  #228B22
head(plei_genecolor_graph_annotation)
##                                  id genecolor
## PrileiEVm000001t1 PrileiEVm000001t1   #FA9E42
## PrileiEVm000002t1 PrileiEVm000002t1   #FD6B6B
## PrileiEVm000003t1 PrileiEVm000003t1   #ECE44C
## PrileiEVm000004t1 PrileiEVm000004t1   #78A4D0
## PrileiEVm000005t1 PrileiEVm000005t1   #D26666
## PrileiEVm000006t1 PrileiEVm000006t1   #FFBE96
head(plei_functional_categories_graph_annotation)
##                  id funcat
## 1 PrileiEVm000001t1      O
## 2 PrileiEVm000002t1      I
## 3 PrileiEVm000003t1      L
## 4 PrileiEVm000004t1      T
## 5 PrileiEVm000005t1      W
## 6 PrileiEVm000006t1      P

We merge here all the attributes in a list that can be looped over by our code. If we had more annotation information, we can add it here.

# Merge
plei_attributes_list <- list(
  plei_tfs_graph_annotation,
  plei_modules_graph_annotation,
  plei_genecolor_graph_annotation,
  plei_functional_categories_graph_annotation #,
  # plei_TDHK_graph_annotation, # trans-developmental as in Marletaz et al., 2018
  # plei_TFEG_graph_annotation # distinction TF / non-TF as in Levy et al., 2021
  )

Graph construction

We will use igraph’s graph_from_adjacency_matrix to generate a graph using the topology overlap matrix as source.

plei_graph0 <- graph_from_adjacency_matrix(
  adjmatrix = TOM_2,
  add.colnames = "name",
  mode = "upper",
  weighted = TRUE,
  diag = FALSE,
)

At this point we have a massive graph object (256M values) where every gene is connected to every gene with a minimum value. To prune the graph from sparse interactions, we explore the interactions between pairs of genes as present in the TOM matrix (see markdown #03):

summary(TOM_2[1:1000000])
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000019 0.0001649 0.0003643 0.0179345 0.0012513 1.0000000

We can see that the vast majority of genes do not interact to each other (uppermost quantiles are near the minimum). We can see this in a density plot too.

All interactions

plot(
  density(
    TOM_2[1:1000000]
    )
  , main = "TOM Density values"
  )

Interactions above 0.01 (a very small value of TOM score)

filt_small_values <- TOM_2[1:1000000] > 0.01  # disregard small values

plot(
  density(
    TOM_2[1:1000000][filt_small_values]
    )
  , main = "TOM Density values ( > 0.01)"
  )

We subset the graph edges based on these numbers. For a start, we choose 0.4 .

plei_graph <- subgraph.edges(
  plei_graph0,
  eids = which(E(plei_graph0)$weight >= 0.35),
  delete.vertices = TRUE
) # perhaps also load from memory?

We can parse this graph to ‘annotate’ the vertices (genes) using one of our custom functions:

plei_parsenetwork <- ParseNetwork(plei_graph, plei_attributes_list)

We extract the graph and the attributes data frame.

plei_graph2 <- plei_parsenetwork[[1]]
plei_df_attr <- plei_parsenetwork[[2]]

We can see this graph now has information of TF class, color, functional category, module etc.

plei_graph2
## IGRAPH 5f910ae UNW- 6300 464099 -- 
## + attr: name (v/c), index (v/n), TFclass (v/c), color (v/c), module
## | (v/c), newcolor (v/c), genecolor (v/c), funcat (v/c), weight (e/n)
## + edges from 5f910ae (vertex names):
##  [1] PrileiEVm000002t1--PrileiEVm000068t1 PrileiEVm000002t1--PrileiEVm000171t1
##  [3] PrileiEVm000002t1--PrileiEVm000475t1 PrileiEVm000002t1--PrileiEVm000583t1
##  [5] PrileiEVm000002t1--PrileiEVm000742t1 PrileiEVm000002t1--PrileiEVm000791t1
##  [7] PrileiEVm000002t1--PrileiEVm000819t1 PrileiEVm000002t1--PrileiEVm000869t1
##  [9] PrileiEVm000002t1--PrileiEVm001102t1 PrileiEVm000002t1--PrileiEVm001281t1
## [11] PrileiEVm000002t1--PrileiEVm001314t1 PrileiEVm000002t1--PrileiEVm001388t1
## [13] PrileiEVm000002t1--PrileiEVm001532t1 PrileiEVm000002t1--PrileiEVm002285t1
## + ... omitted several edges

The number of genes in the graph, calculated as the length of the ‘vertices’ subset of the graph:

vcount(plei_graph2)
## [1] 6300

##Divide into components

We can plot the network for a quick check.

We will first generate a layout for the graph that we will store for later use.

plei_layout_kk <- layout_with_kk(
  plei_graph2,
  maxiter = 100 * vcount(plei_graph2),
  kkconst = vcount(plei_graph2)
  )

Now for the plot:

plot(
    main = "Pristina leidyi WGCNA network,\nKamida-Kawai (kk) layout",
    plei_graph2,
    vertex.size = 1.5,
    vertex.label = NA,
    vertex.color = "gray",
    edge.color = rgb(0,0,0,0.1),
    layout = plei_layout_kk
)

We can observe that not every gene has the same amount of connections and that there seem to be compartmentalised modules. Do these correspond to the WGCNA modules?

For that, we can divide the graph into so-called components using the components tool from igraph.

plei_id_component <- data.frame(
  id = names(components(plei_graph2,mode=c("strong"))$membership),
  member = components(plei_graph2,mode=c("strong"))$membership
) %>% remove_rownames
head(plei_id_component)
##                  id member
## 1 PrileiEVm000002t1      1
## 2 PrileiEVm000004t1      2
## 3 PrileiEVm000005t1      3
## 4 PrileiEVm000006t1      4
## 5 PrileiEVm000008t1      3
## 6 PrileiEVm000009t1      5

And for a quick check on the number of genes per connected component

mainCCs <- data.frame(
  CC = names(table(plei_id_component$member)),
  ngenes = as.data.frame(table(plei_id_component$member))[,2]
) %>%
  remove_rownames %>%
  arrange(desc(ngenes))
  
head(mainCCs)
##   CC ngenes
## 1  2   1103
## 2  4    959
## 3  3    625
## 4 14    494
## 5  1    445
## 6  9    357

Classification agreement between pairs of genes

We compare this classification between the wgcna clustering, the binning by max ctype expression, and the mainCC membership

With this, we have one gene : one component. This is another way of classifying the genes. We can thus asses whether genes end up classified in a similar way as they did with WGCNA (using the Adjusted Rand Index). We can also check how well this matches our initial classification of genes by peak of expression.

# Merge the data
plei_adjrand <- merge(
  plei_id_component,
  plei_modules_graph_annotation,
  by = 1,
  all.x = T
)
plei_adjrand <- merge(
  plei_adjrand,
  plei_genecolor_graph_annotation,
  by = 1
)

# Adjusted Rand Index
library(mclust)
plei_module_ctype_adjrandIndex <- adjustedRandIndex(
  x = plei_adjrand$module,
  y = plei_adjrand$genecolor
)
plei_module_CC_adjrandIndex <- adjustedRandIndex(
  x = plei_adjrand$module,
  y = plei_adjrand$member
)
plei_ctype_CC_adjrandIndex <- adjustedRandIndex(
  x = plei_adjrand$genecolor,
  y = plei_adjrand$member
)

As we can see we can use igraph to isolate WGCNA modules. alternatively we could subset the network to separate all genes from the same module into smaller subgraphs.

Graph Visualisation

We add the colors of modules and max ctype clustering and we observe the results:

V(plei_graph2)$color <- V(plei_graph2)$genecolor

plot(
    main = "Pristina network, color\nof cell type at which max expr",
    plei_graph2,
    vertex.size = 1.5,
    vertex.label = NA,
    edge.color = rgb(0,0,0,0.1),
    layout = plei_layout_kk
)

V(plei_graph2)$color <- V(plei_graph2)$newcolor
plot(
    main = "Pristina network, color\nof wgcna module",
    plei_graph2,
    vertex.size = 1.5,
    vertex.label = NA,
    edge.color = rgb(0,0,0,0.1),
    layout = plei_layout_kk
)

Analysis of connected components

Seeing that there is good agreement between WGCNA modules and TOM-derived CCs, we decide to explore these networks using a graph-based approach.

To have a better glimpse at the structure and features fo the different network modules, we can split the actual graph object in smaller graph objects containing the genes of each connected component.

We focus on centrality but future studies will expand to gene degree and assortativity.

#' seeing that there is good agreement between wgcna modules and TOM-derived CCs,
#' we decide to explore these networks using a graph-based approach
plei_grns_list <- divide_into_components( 
  x = plei_graph2,
  CCs = plei_id_component
)
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30

We can plot the looks of each network using this loop.

for (i in 1:length(plei_grns_list)) {
  l <- layout_with_mds(plei_grns_list[[i]])
  plot(
    main = paste0("mainCC ",i),
    plei_grns_list[[i]],
    vertex.size = 1.5,
    vertex.label = NA,
    edge.color = rgb(0,0,0,0.1),
    layout = l
  )
}

#add example graphs as a png grid of networks

We retrieve the centrality of all the TF genes in the module networks:

tfscentr_by_module <- centrality_by_network(plei_grns_list)

plei_top_central_tfs <- 
  unlist(
    sapply(
      tfscentr_by_module,
      function(x){
      if(length(x) < 3) {names(x)} else {names(rev(sort(x))[1:3])}
      }
      )
    )

plei_tfs_centrality_df <- 
  merge(
    data.frame(
      id = sub(".*\\.","",names(unlist(tfscentr_by_module))),
      module = sub("\\..*","",names(unlist(tfscentr_by_module))),
      centrality = unlist(tfscentr_by_module)
    ),
    plei_modules_table,
    by.x = 2,
    by.y = 5,
    all.x = TRUE
  ) [,c(2,3,1,7)] %>% 
  mutate(top_central = ifelse(id %in% plei_top_central_tfs,TRUE,FALSE)) %>%
  remove_rownames

colnames(plei_tfs_centrality_df) <- c("id","centrality","module","color","top_central")

head(plei_tfs_centrality_df)
##                  id  centrality                         module   color
## 1 PrileiEVm007974t1 0.002479127      module_anterior_intestine #228B22
## 2 PrileiEVm008526t1 0.002491907      module_anterior_intestine #228B22
## 3 PrileiEVm003039t1 0.007432334 module_anterior_mid_intestine2 #A2B63C
## 4 PrileiEVm004337t1 0.009085936 module_anterior_mid_intestine2 #A2B63C
## 5 PrileiEVm006596t1 0.007746694 module_anterior_mid_intestine2 #A2B63C
## 6 PrileiEVm017429t1 0.009064182 module_anterior_mid_intestine2 #A2B63C
##   top_central
## 1        TRUE
## 2        TRUE
## 3       FALSE
## 4        TRUE
## 5        TRUE
## 6        TRUE

There seems to be a link between TF centrality in a given network module and the number of TFs in said network module.

Using ggplot2 we can visualise the values of centrality of transcription factors in each module, highlighting the most central ones.

ggplot(
  plei_tfs_centrality_df,
  aes(
    x = forcats::fct_rev(module),
    y = log(centrality,10),
  )
) + 
  geom_jitter(
    position = position_jitter(0.6),
    fill = plei_tfs_centrality_df$color,
    shape = 21,
    size = 3
  ) +
  scale_color_manual(values = c("NA", "black"), guide="none")+
  theme(legend.position="none") +
  theme_minimal() +
  coord_flip() #add names to the top three

And to finally have a look in the network itself:

plei_toptfvertex <- which(V(plei_graph2)$name %in% plei_tfs_centrality_df$id)


plei_graph2_tfviz <- plei_graph2

V(plei_graph2_tfviz)$color <- alpha(colour="#ececec",alpha=0.5)
V(plei_graph2_tfviz)$color[plei_toptfvertex] <- V(plei_graph2)$color[plei_toptfvertex]
V(plei_graph2_tfviz)$size <- 1.5
V(plei_graph2_tfviz)$size[plei_toptfvertex] <- 2.2

plot(
  main = "Pristina network, top central tfs",
  plei_graph2_tfviz,
  #vertex.size = 1.5,
  vertex.label = NA,
  vertex.frame.color = rgb(0,0,0,0.15),
  edge.color = rgb(0.6,0.6,0.65,0.1),
  layout = plei_layout_kk
)

Exploring connections between modules

We generate a subgraph with more lax values of TOM score.

plei_graph_analysis <- subgraph.edges(
  plei_graph0,
  eids = which(E(plei_graph0)$weight >= 0.2), # try 0.2
  delete.vertices = TRUE
)

We parse this network to retrieve, for all the genes in the network, how many genes from each module connect to a given gene.

Take one gene x. This gene connects to a number of other genes. These other genes can be from different modules: for a gene in a muscle module, many of its neighbour genes will be from the muscle module, but some may be from epidermal, nervous, or whichever other module. This function counts how many genes from each module are direct neighbours to gene x.

This information is stored in a table where every gene is in a row and every column is a module. Values e.g. gene x - module j indicates how many genes from module j are direct neighbour from gene x.

plei_cross_connections <- as.data.frame(
  t(
    data.frame(
      lapply(
        V(plei_graph_analysis)$name,
        connections_to_module_per_gene,
        network = plei_graph_analysis,
        id_module = plei_id_module
      )
    )
  )
)
rownames(plei_cross_connections) <-  V(plei_graph_analysis)$name
colnames(plei_cross_connections) <- levels(plei_id_module$module)

plei_cross_connections <- merge(
  plei_cross_connections,
  plei_id_module,
  by.x = 0,
  by.y = 1
)
colnames(plei_cross_connections)[1] <- "id"

str(plei_cross_connections)
## 'data.frame':    9123 obs. of  42 variables:
##  $ id                            : 'AsIs' chr  "PrileiEVm000002t1" "PrileiEVm000004t1" "PrileiEVm000005t1" "PrileiEVm000006t1" ...
##  $ module_piwi                   : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ module_epidermis2             : int  0 1220 0 0 0 0 1133 364 0 0 ...
##  $ module_epidermis6             : int  0 41 0 0 0 0 14 0 0 0 ...
##  $ module_crop                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_anterior_intestine     : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ module_anterior_mid_intestine1: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_anterior_mid_intestine2: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_stomach1               : int  0 0 0 0 0 89 0 0 0 0 ...
##  $ module_stomach2a              : int  0 0 0 0 0 2 0 0 0 0 ...
##  $ module_stomach2b              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_posterior_intestine    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_caveolin               : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ module_cilia                  : int  0 0 0 25 0 0 0 0 0 0 ...
##  $ module_muscle2                : int  0 0 73 0 147 0 0 0 0 276 ...
##  $ module_muscle3                : int  0 0 1 0 1 0 0 0 0 0 ...
##  $ module_muscle4                : int  0 0 191 0 242 0 0 0 0 98 ...
##  $ module_neurons1               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_neurons2               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_neurons3               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_neurons4               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_neurons5               : int  0 0 0 0 0 0 0 0 34 0 ...
##  $ module_neurons6               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_neurons7               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_globin                 : int  490 0 0 0 0 0 0 0 0 0 ...
##  $ module_polycistin2            : int  0 1 0 15 0 0 0 0 0 0 ...
##  $ module_polycistin1            : int  0 0 0 173 0 0 0 0 0 0 ...
##  $ module_mmp24                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_fucolectin             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_pgrn                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_chaetal1               : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ module_chaetal2               : int  1 1 0 0 0 0 0 0 0 0 ...
##  $ module_lipoxygenase           : int  1 1 0 0 0 0 1 0 0 0 ...
##  $ module_vigilin                : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_lumbrokinase           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_carbmetabol            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_secretory1             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_secretory2             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_arginase               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_ldrr                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module_metanephridia          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ module                        : Factor w/ 40 levels "module_piwi",..: 24 2 16 26 14 8 2 2 21 14 ...

Plain sum of the connections might mask connections between smaller modules. Dividing the cross-connections matrix by the size of the volume of the gene x can help highlighting these connections.

# Think of a dplyr way...
size_cloud <- data.frame(
  table(plei_cross_connections$module)
  ) ; colnames(size_cloud) <-  c("id","size")


plei_cross_connections <- merge(
  plei_cross_connections,
  size_cloud,
  by.x = 42,
  by.y = 1
)

plei_cross_connections_norm <- 
  plei_cross_connections[,3:42]/plei_cross_connections[,43]
rownames(plei_cross_connections_norm) <- plei_cross_connections$id
plei_cross_connections_norm$module <- plei_cross_connections$module

From this table of cross-connections we will keep those genes that neighbour with genes from more than one module.

filt_more_one_module <- apply(
  plei_cross_connections_norm[,1:40],
  1,
  function(x){
    a <- length( which( x > 0 ) ) > 1
    return(a)
    }
  )

plei_cross_connections_norm <- 
  plei_cross_connections_norm[filt_more_one_module, ]

We aggregate these numbers by module to retrieve the number of cross-connections between modules.

plei_cross_connections_norm_module <- aggregate(
  plei_cross_connections_norm[1:40],
  by = list(Module = plei_cross_connections_norm$module),
  FUN = sum
  )

rownames(plei_cross_connections_norm_module) <- plei_cross_connections_norm_module$Module
plei_cross_connections_norm_module$Module <-  NULL

plei_cross_connections_norm_module <- 
  plei_cross_connections_norm_module[
    ,
    colSums(plei_cross_connections_norm_module) > 0
  ]

One possible way to visualise and integrate this information is through a heatmap.

## Warning: The input is a data frame-like object, convert it to a matrix.

But… there is another way to represent this information, and that is by doing another network. This time, we create a network using the number of cross-connections as a proxy for adjacency in the network.

plei_cross_connections_graph <- graph_from_adjacency_matrix(
  adjmatrix = as.matrix(plei_cross_connections_norm_module),
  add.colnames = "name",
  mode = "upper",
  weighted = TRUE,
  diag = FALSE,
)
V(plei_cross_connections_graph)$color <- plei_modules_table$newcolor
E(plei_cross_connections_graph)$width <-  log(E(plei_cross_connections_graph)$weight*5)
l <- layout_in_circle(plei_cross_connections_graph)
set.seed(1234)
plot(
  main = "connections between gene modules",
  plei_cross_connections_graph,
  edge.color = rgb(0,0,0,0.3),
  layout = l,
  vertex.label.dist=1.5,
  vertex.label.cex = 0.8,
  vertex.size = 12
)

A different layout:

set.seed(4343)
plot(
  main = "connections between gene modules",
  plei_cross_connections_graph,
  edge.color = rgb(0,0,0,0.3),
  vertex.label.dist=1.5,
  vertex.label.cex = 0.6,
  vertex.size = 12
)

To explore these connections we can dig into the biological processes where these connected genes might be working.

First we extract the neighbouring genes from the cross-connection table, assuming that genes with cross-connections between modules i and j are neighbours in these connections.

(At some point this should be deprecated in favor of a function that parses TOM or the graph based on real connections and not this downstream fishing method.)

plei_genes_connecting_pairs_modules <- cross_connected_genes(
  cross_connections = plei_cross_connections,
  id_modules = plei_id_module
)

plei_id_GO <-
  readMappings(
    "./data/04_eggNOGannotation/plei_eggnog_GOs.tsv"
  )

ciliacross_connection_GOs <- getGOs(
  genelist = list(
    genes = plei_genes_connecting_pairs_modules$module_cilia_module_polycistin1
    ),
  gene_universe = 
    rownames(plei_cpm),
  gene2GO = plei_id_GO
)
## [1] "Starting analysis 1 of 1"
ciliacross_connection_GOs$GOtable[[1]]
##         GO.ID                                                 Term Annotated
## 1  GO:0044782                                  cilium organization       487
## 2  GO:0060271                                      cilium assembly       475
## 3  GO:0120031     plasma membrane bounded cell projection assembly       783
## 4  GO:0030031                             cell projection assembly       808
## 5  GO:0007018                           microtubule-based movement       442
## 6  GO:0003341                                      cilium movement       171
## 7  GO:0070925                                   organelle assembly      1157
## 8  GO:0035082                                     axoneme assembly       101
## 9  GO:0007017                            microtubule-based process      1134
## 10 GO:0001578                         microtubule bundle formation       164
## 11 GO:0120036 plasma membrane bounded cell projection organization      2246
## 12 GO:0030030                         cell projection organization      2271
## 13 GO:0001539          cilium or flagellum-dependent cell motility       126
## 14 GO:0060285                       cilium-dependent cell motility       126
## 15 GO:0042073                               intraciliary transport        56
## 16 GO:0098840                  protein transport along microtubule        72
## 17 GO:0099118                  microtubule-based protein transport        72
## 18 GO:0060294            cilium movement involved in cell motility       104
## 19 GO:0070286                     axonemal dynein complex assembly        41
## 20 GO:0097722                                       sperm motility       102
## 21 GO:0030317                           flagellated sperm motility        99
## 22 GO:0099111                          microtubule-based transport       266
## 23 GO:0035735   intraciliary transport involved in cilium assembly        36
## 24 GO:0006928            movement of cell or subcellular component      2408
## 25 GO:1905515                           non-motile cilium assembly        83
## 26 GO:0036159                            inner dynein arm assembly        20
## 27 GO:0061512                       protein localization to cilium        67
## 28 GO:0007368                 determination of left/right symmetry       215
## 29 GO:0009799                            specification of symmetry       232
## 30 GO:0009855                  determination of bilateral symmetry       232
##    Significant Expected                      classicFisher
## 1          140    12.03 < 0.000000000000000000000000000001
## 2          136    11.73 < 0.000000000000000000000000000001
## 3          140    19.34 < 0.000000000000000000000000000001
## 4          140    19.96 < 0.000000000000000000000000000001
## 5          107    10.92 < 0.000000000000000000000000000001
## 6           74     4.22 < 0.000000000000000000000000000001
## 7          141    28.58 < 0.000000000000000000000000000001
## 8           54     2.49 < 0.000000000000000000000000000001
## 9          127    28.01 < 0.000000000000000000000000000001
## 10          55     4.05 < 0.000000000000000000000000000001
## 11         159    55.47 < 0.000000000000000000000000000001
## 12         159    56.09 < 0.000000000000000000000000000001
## 13          47     3.11 < 0.000000000000000000000000000001
## 14          47     3.11 < 0.000000000000000000000000000001
## 15          33     1.38 < 0.000000000000000000000000000001
## 16          33     1.78 < 0.000000000000000000000000000001
## 17          33     1.78 < 0.000000000000000000000000000001
## 18          37     2.57 < 0.000000000000000000000000000001
## 19          26     1.01 < 0.000000000000000000000000000001
## 20          35     2.52 < 0.000000000000000000000000000001
## 21          34     2.45  0.0000000000000000000000000000025
## 22          49     6.57  0.0000000000000000000000000000213
## 23          23     0.89  0.0000000000000000000000000000722
## 24         138    59.48  0.0000000000000000000000000189796
## 25          29     2.05  0.0000000000000000000000000315115
## 26          16     0.49  0.0000000000000000000000543957325
## 27          24     1.65  0.0000000000000000000003565695343
## 28          37     5.31  0.0000000000000000000032946850527
## 29          37     5.73  0.0000000000000000000494556058666
## 30          37     5.73  0.0000000000000000000494556058666

We can see that these neighbour genes across pairs of modules are sometimes enriched in functions linking to processes common to both cell types. In this example case, we see how genes highly expressed in polycystin positive cells that behave somewhat similarly to genes from the cilia module are also involved in cilium biology.

Saving the data

save(
  # table of counts
  plei_cpm,
  # graph from tom
  plei_graph0,
  # attributes and network with attributes
  plei_attributes_list,
  plei_graph2,
  plei_df_attr,
  plei_layout_kk,
  # components
  plei_id_component,
  mainCCs,
  plei_adjrand,
  # separate networks
  plei_grns_list,
  # TF centrality
  tfscentr_by_module,
  plei_tfs_centrality_df,
  plei_toptfvertex,
  plei_graph2_tfviz,
  # Cross_connections
  plei_cross_connections,
  plei_cross_connections_norm,
  plei_cross_connections_norm_module,
  plei_graph_analysis,
  plei_cross_connections_graph,
  plei_genes_connecting_pairs_modules,
  file = "./outputs/rda/04_plei_wgcna_graph_analysis.rda"
)